# This script creates all graphs and figures related to countries' association with chaos and corruption

# Loading prerequisites
rm(list = ls())

library(tidyr)
library(dplyr)
library(purrr)
library(data.table)
library(showtext)
library(ggplot2)
library(modelsummary)
library(sandwich)
library(lmtest)
library(fwildclusterboot)
library(fixest)
library(kableExtra)
library(marginaleffects)
library(flextable)
library(ggrepel)
library(extrafont)
library(ggthemes)

# Setting fonts
font_location <- "/usr/share/fonts/truetype/ebgaramond/EBGaramond12-Regular.ttf"

font_import(pattern = "Garamond", prompt = FALSE)
font_add(family = "EB Garamond 12", regular = font_location)


# Functions

conf_int_fun <- function(reg, label, r = 250, core = 1) {
  a1 <- coeftest(reg, vcovCL(reg, type="HC1")) |>
    confint()
  a2 <- coeftest(reg, vcov = vcovCL, type="HC1", cluster = ~ Dictionary1) |>
    confint()
  a3 <- coeftest(reg, vcovBS(reg, cluster = ~Dictionary1, type = "wild", R = r, core = 4)) |>
    confint()
  a4 <- coeftest(reg, vcov = vcovCL, type="HC1", cluster = ~ Dictionary2) |>
    confint()
  a5 <- coeftest(reg, vcovBS(reg, cluster = ~Dictionary2, type = "wild", R = r, core = 4)) |>
    confint()
  ci <- rbind(a1[nrow(a1),],
              a2[nrow(a2),],
              a3[nrow(a3),],
              a4[nrow(a4),],
              a5[nrow(a5),]
              )
  df <- data.frame(ci,
                   coef = reg$coefficients[length(reg$coefficients)],
                   label,
                   clustering = c("Heteroskedastistic",
                                  "Cluster SE (Country Dict)",
                                  "Bootstrap (Country Dict)",
                                  "Cluster SE (Attribute Dict)",
                                  "Bootstrap (Attribute Dict)"
                                  ))
  names(df)[1:2] <- c("Pct_2.5", "Pct_97.5")
  return(df)
}

se_reg_fun <- function(reg, r = 250, core = 2) {
  a1 <- coeftest(reg, vcovCL(reg, type="HC1"))
  a2 <- coeftest(reg, vcovCL(reg, type="HC1", cluster = ~ Dictionary1))
  a3 <- coeftest(reg, vcovBS(reg, cluster = ~ Dictionary1, type = "wild", R = r, core = 4))
  a4 <- coeftest(reg, vcov = vcovCL, type="HC1", cluster = ~ Dictionary2)
  a5 <- coeftest(reg, vcovBS(reg, cluster = ~ Dictionary2, type = "wild", R = r, core = 4))
  out <- list(a1,
              a2,
              a3,
              a4,
              a5)
  return(out)
}

rob_reg <- function(x) {
  coeftest(x, vcov = vcovCL(x, type="HC1", cluster = ~Dictionary1))
}

modelsummary_to_tex <- function(x, cap = NULL, totex = TRUE) {
  form <- ifelse(totex, "latex", "html")
  x |> 
    mutate(term = ifelse(statistic == "std.error", "", term)) |> 
    select(-part, -statistic) |> 
    rename(` ` = term) |> 
    kbl(format = form, booktabs = TRUE, caption = cap)
}


# Reading data
setwd("~/Dropbox/Diaspora_Narratives/Submission/SecStud/Replication")


df <- fread('02_data/country_data.csv')

### Graphs

df_sum <- df |>
  group_by(Concept2, Government_lab, Democracy_bin) |>
  filter(!is.na(Democracy_bin)) |>
  summarize(Similarity = 100 * mean(Similarity_mean, na.rm = TRUE),
            Similarity_sd = 100 * sd(Similarity_mean, na.rm = TRUE),
            N = n()) |>
  mutate(Similarity_se = Similarity_sd / sqrt(N),
         Similarity_upper = Similarity + qt(0.975, N) * Similarity_se,
         Similarity_lower = Similarity - qt(0.975, N) * Similarity_se
         ) |>
  group_by(Concept2, Democracy_bin) |>
  mutate(Similarity_diff = Similarity[1] - Similarity[2])

plot_country_sim <-
  df_sum |>
  mutate(Democracy_bin = Democracy_bin) |>
  ggplot(aes(x = Democracy_bin, y = Similarity, color = Government_lab, group = Government_lab)) +
  geom_ribbon(aes(ymin = Similarity_lower, ymax = Similarity_upper), fill = "grey", color = "grey", show.legend = FALSE) +
  geom_line() +
  scale_x_continuous(breaks = seq(0.1,1,0.1)) +
  scale_color_grey() +
  facet_grid(Concept2 ~ .) +
  labs(color = "Affiliation", x = "Democracy (V-Dem)", y = "Similarity (%)") +
  theme_bw() +
  theme(legend.position = "bottom", text = element_text(family = "EB Garamond 12", size = 10))

plot_country_sim
ggsave("03_output/Graphs/country_similarity_regime.pdf",
       width = 7, height = 7)
ggsave("03_output/Graphs/country_similarity_regime.png",
       width = 7, height = 7)

plot_country_diff_regime <-
  df_sum |>
  ggplot(aes(x = Democracy_bin, y = Similarity_diff, color = Concept2)) +
  geom_line() +
  scale_x_continuous(breaks = seq(0.1,0.9,0.1)) +
  scale_color_grey() +
  labs(color = "Affiliation", x = "Democracy (V-Dem)", y = "Similarity (%)") +
  theme_bw() +
  theme(legend.position = "bottom", text = element_text(family = "EB Garamond 12", size = 12))

plot_country_diff_regime
ggsave("03_output/Graphs/country_difference_regime.pdf",
       width = 7, height = 7)

ggsave("03_output/Graphs/country_difference_regime.png",
       width = 7, height = 7)

# By subscription account

df_sum_act <- df |>
  group_by(Concept2, Eng, Democracy_bin, Government_lab, lab) |>
  filter(!is.na(Democracy_bin)) |>
  summarize(Similarity = 100 * mean(Similarity_mean, na.rm = TRUE),
            Similarity_sd = 100 * sd(Similarity_mean, na.rm = TRUE),
            N = n()) |>
  mutate(Similarity_se = Similarity_sd / sqrt(N),
         Similarity_upper = Similarity + qt(0.975, N) * Similarity_se,
         Similarity_lower = Similarity - qt(0.975, N) * Similarity_se
         ) |>
  group_by(Concept2, Democracy_bin) |>
   filter(Concept2 != "Sports") |>
  mutate(Similarity_diff = Similarity[1] - Similarity[2])

df_sum_act_gen <- df |>
  group_by(Concept2, Democracy_bin, Government_lab, Eng_general, lab_general) |>
  filter(!is.na(Democracy_bin)) |>
  summarize(Similarity = 100 * mean(Similarity_mean, na.rm = TRUE),
            Similarity_sd = 100 * sd(Similarity_mean, na.rm = TRUE),
            N = n()) |>
  mutate(Similarity_se = Similarity_sd / sqrt(N),
         Similarity_upper = Similarity + qt(0.975, N) * Similarity_se,
         Similarity_lower = Similarity - qt(0.975, N) * Similarity_se
         ) |>
  group_by(Concept2, Democracy_bin) |>
   filter(Concept2 != "Sports") |>
  mutate(Similarity_diff = Similarity[1] - Similarity[2])

plot_sim_regime <- df_sum_act |>
  ggplot(aes(x = Democracy_bin, y = Similarity, color = Eng, linetype = Eng, group = Eng)) +
  geom_smooth(method = "loess", se = F) +
  geom_text_repel(aes(label = lab),
                  box.padding = 0.5,
                   na.rm = TRUE,
                   show.legend = FALSE) +
  scale_x_continuous(breaks = seq(0.1,1,0.1)) +
  scale_color_grey() +
  facet_grid(Concept2 ~ Government_lab) +
  labs(color = "Affiliation",
       x = "Democracy (V-Dem)",
       y = "Similarity (%)",
       linetype = "Affiliation") +
  theme_bw() +
  theme(legend.position = "bottom", text = element_text(family = "EB Garamond 12", size = 10))

plot_sim_regime
ggsave("03_output/Graphs/sa_similarity_regime.pdf",
       width = 7, height = 7)
ggsave("03_output/Graphs/sa_similarity_regime.png",
       width = 7, height = 7)

plot_regime_act_summed <-
  df_sum_act_gen |>
  filter(Concept2 != "Sports") |>
  ggplot(aes(x = Democracy_bin, y = Similarity, color = Eng_general, linetype = Eng_general, group = Eng_general)) +
  geom_smooth(method = "loess", se = F) +
  geom_text_repel(aes(label = lab_general), box.padding = 0.5,
                   na.rm = TRUE,
                   show.legend = FALSE) +
  scale_x_continuous(breaks = seq(0.1,1,0.1)) +
  scale_color_grey() +
  facet_grid(Concept2 ~ Government_lab) +
  labs(color = "Affiliation",
       x = "Democracy (V-Dem)",
       y = "Similarity (%)",
       linetype = "Affiliation") +
  theme_bw() +
  theme(legend.position = "bottom", text = element_text(family = "EB Garamond 12", size = 10))

plot_regime_act_summed
ggsave("03_output/Graphs/sa_similarity_regime_summed.pdf",
       width = 7, height = 7)
ggsave("03_output/Graphs/sa_similarity_regime_summed.png",
       width = 7, height = 7)


### Regression Tables

set_flextable_defaults(font.family = "Times New Roman")

fp_text <- fp_text_default(italic = TRUE)
pars_dv_w <- as_paragraph(as_chunk(c("","Similarity with Country Dictionary (%)"), props = fp_text))
pars_dv_wo <- as_paragraph(as_chunk(c("","Similarity (%)"), props = fp_text))
pars_type <- as_paragraph(as_chunk(c("","Racism","Violence")))

table_note <- list("Notes: $^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01 White robust standard-errors clustered on country in parentheses. Dependent variable is cosine similarity between country and respective attribute dictionaries. Controls for dictionary term frequency are shown. Unit of analysis is the object-attribute word pair, which varies according to the object and attribute dictionary size.")
table_note_html <- list("Notes: * p<0.1; ** p<0.05; *** p<0.01 White robust standard-errors clustered on country in parentheses. Dependent variable is cosine similarity between country and respective attribute dictionaries. Controls for dictionary term frequency are shown. Unit of analysis is the object-attribute word pair, which varies according to the object and attribute dictionary size.")
table_note_wild <- list("Notes: $^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01 Regressions labeled with ``Boot\" use Wild Bootstrapping to compute standard errors. Dependent variable is cosine similarity between country and respective attribute dictionaries. Controls for dictionary term frequency are shown. Unit of analysis is the object-attribute word pair, which varies according to the object and attribute dictionary size.")
table_note_wild_html <- list("Notes: * p<0.1; ** p<0.05; *** p<0.01 Regressions labeled with \"Boot\" use Wild Bootstrapping to compute standard errors. Dependent variable is cosine similarity between country and respective attribute dictionaries. Controls for dictionary term frequency are shown. Unit of analysis is the object-attribute word pair, which varies according to the object and attribute dictionary size.")

star_map <- c('*' = 0.1, '**' = 0.05, '***' = 0.01)

df_chaos <- df |>
  filter(Concept2 == "Chaos")
df_corruption <- df |>
  filter(Concept2 == "Corruption")
df_sports <- df |>
  filter(Concept2 == "Sports")



reg_chaos <- lm(Similarity_mean*100 ~ Government * v2x_libdem  + log(nDict1_mean) + log(nDict2_mean) + Eng + as.factor(Year) + Dictionary1 + Dictionary2, data = df_chaos) |>
  rob_reg()

reg_corruption <- lm(Similarity_mean*100 ~ Government * v2x_libdem + log(nDict1_mean) + log(nDict2_mean) + Eng + as.factor(Year) + Dictionary1 + Dictionary2, data = df_corruption) |>
  rob_reg()

reg_sports <- lm(Similarity_mean*100 ~ Government * v2x_libdem + log(nDict1_mean) + log(nDict2_mean) + Eng + as.factor(Year) + Dictionary1 + Dictionary2, data = df_sports) |>
  rob_reg()


# Table 1
dict <- c("Government Accounts", "Democracy (vdem)", "Govt. Acct. x Democracy", "ln Frequency 1", "ln Frequency 2")
names(dict) <- c("Government", "v2x_libdem", "Government:v2x_libdem", "log(nDict1_mean)", "log(nDict2_mean)")
model_list <- list(reg_chaos, reg_corruption, reg_sports)
gm <- tribble(~raw, ~clean, ~fmt,
              "r.squared", "R Squared", 2,
              "nobs", "Observations", 0)

rows_gen <- tibble::tribble(~m0, ~m1, ~m2, ~m3,
                        'Subscription Account', 'Yes', 'Yes', 'Yes',
                        'Year','Yes', 'Yes', 'Yes',
                        'Dictionary','Yes', 'Yes', 'Yes')

model_labels <- paste(c("Chaos", "Corruption", "Sports"), " (", 1:3, ")", sep = "")
names(model_list) <- model_labels
names(rows_gen) <- c('term', model_labels)


tab1 <-
  modelsummary(model_list,
               coef_map = dict,
               coef_omit = c("^Eng|Year|^Dictionary"),
               estimate = "{estimate}{stars}",
               gof_map = gm,
               add_rows = rows_gen,
               fmt = "%.2f",
               stars = star_map,
               output = 'data.frame',
               escape = FALSE # Included to allow the 'label' command to be interpreted as latex code
               ) |>
  modelsummary_to_tex(cap = "Effect of Regime Type on Framing by Attribute\\label{tab:regimereg}", totex = TRUE) |>
  kable_styling(latex_options = c("hold_position")) |>
  kableExtra::footnote(general = table_note, escape = FALSE, threeparttable = TRUE, general_title = "") |>
  row_spec(10, hline_after = TRUE) |>
  pack_rows("Statistics", 11, 11, bold = FALSE, italic = TRUE, label_row_css = "", latex_gap_space = "") |>
  pack_rows("Fixed effects", 12, 14, bold = FALSE, italic = TRUE, label_row_css = "", latex_gap_space = "") |>
  add_header_above(c(" " = 1, "Similarity with Country Dictionary (%)" = 3), underline = FALSE, italic = TRUE, line = FALSE) #|>

save_kable(tab1, file = "03_output/Tables/table_country_concept_reg.tex")

### Table 1: Flextable


tab1_flex <-
  modelsummary(model_list,
               output = "flextable",
               coef_map = dict,
               coef_omit = c("^Eng|Year|^Dictionary"),
               estimate = "{estimate}{stars}",
               gof_map = gm,
               add_rows = rows_gen,
               fmt = "%.2f",
               stars = star_map,
               title = "Effect of Regime Type on Framing by Attribute",
               notes = table_note_html,
               escape = FALSE # Included to allow the 'label' command to be interpreted as latex code
               ) |>
  hline(10) |>
  add_header_row(values = pars_dv_w,
                 colwidths = c(1, length(model_list)), top = TRUE) |>
  align_text_col(align = "center", header = TRUE, footer = FALSE)



save_as_docx(tab1_flex, path = "03_output/Tables/table_country_concept_reg.docx")


# Additional tables

## Robustness alt dem: Poly

reg_chaos_poly <- lm(Similarity_mean*100 ~ Government * v2x_polyarchy  + log(nDict1_mean) + log(nDict2_mean) + Eng + as.factor(Year) + Dictionary1 + Dictionary2, data = df_chaos) |>
  rob_reg()

reg_corruption_poly <- lm(Similarity_mean*100 ~ Government * v2x_polyarchy + log(nDict1_mean) + log(nDict2_mean) + Eng + as.factor(Year) + Dictionary1 + Dictionary2, data = df_corruption) |>
  rob_reg()

reg_sports_poly <- lm(Similarity_mean*100 ~ Government * v2x_polyarchy + log(nDict1_mean) + log(nDict2_mean) + Eng + as.factor(Year) + Dictionary1 + Dictionary2, data = df_sports) |>
  rob_reg()

# Table 2
dict <- c("Government Accounts", "Democracy (Polyarchy)", "Govt. Acct. x Democracy", "ln Frequency 1", "ln Frequency 2")
names(dict) <- c("Government", "v2x_polyarchy", "Government:v2x_polyarchy", "log(nDict1_mean)", "log(nDict2_mean)")
model_list_poly <- list(reg_chaos_poly, reg_corruption_poly, reg_sports_poly)
model_labels_poly <- paste(c("Chaos", "Corruption", "Sports"), " (", 1:3, ")", sep = "")
names(model_list_poly) <- model_labels_poly

gm <- tribble(~raw, ~clean, ~fmt,
              "r.squared", "R Squared", 2,
              "nobs", "Observations", 0)

rows_poly <- rows_gen
names(rows_poly) <- c('term', model_labels_poly)

tab_poly <-
  modelsummary(model_list_poly,
               coef_map = dict,
               coef_omit = c("^Eng|Year|^Dictionary"),
               estimate = "{estimate}{stars}",
               gof_map = gm,
               add_rows = rows_poly,
               fmt = "%.2f",
               stars = star_map,
               output = 'data.frame',
               escape = FALSE
               ) |>
  modelsummary_to_tex(cap = "Effect of Regime Type (Polyarchy) on Framing by Attribute\\label{tab:regimeregpoly}", totex = TRUE) |>
  kable_styling(latex_options = c("hold_position")) |>
  kableExtra::footnote(general = table_note, escape = FALSE, threeparttable = TRUE, general_title = "") |>
  row_spec(10, hline_after = TRUE) |>
  pack_rows("Statistics", 11, 11, bold = FALSE, italic = TRUE, label_row_css = "", latex_gap_space = "") |>
  pack_rows("Fixed effects", 12, 14, bold = FALSE, italic = TRUE, label_row_css = "", latex_gap_space = "") |>
  add_header_above(c(" " = 1, "Similarity with Country Dictionary (%)" = 3), underline = FALSE, italic = TRUE, line = FALSE) #|>

save_kable(tab_poly, file = "03_output/Tables/table_country_concept_reg_poly.tex")

tab_poly_flex <-
  modelsummary(model_list_poly,
               coef_map = dict,
               coef_omit = c("^Eng|Year|^Dictionary"),
               estimate = "{estimate}{stars}",
               gof_map = gm,
               add_rows = rows_poly,
               fmt = "%.2f",
               stars = star_map,
               output = 'flextable',
               title = "Effect of Regime Type (Polyarchy) on Framing by Attribute",
               notes = table_note_html,
               escape = FALSE
               ) |>
  hline(10)|>
  add_header_row(values = pars_dv_w,
                 colwidths = c(1, length(model_list)), top = TRUE) |>
  align_text_col(align = "center", header = TRUE, footer = FALSE)

save_as_docx(tab_poly_flex, path = "03_output/Tables/table_country_concept_reg_poly.docx")

## Robustness alt dem: delib

reg_chaos_delib <- lm(Similarity_mean*100 ~ Government * v2x_delibdem  + log(nDict1_mean) + log(nDict2_mean) + Eng + as.factor(Year) + Dictionary1 + Dictionary2, data = df_chaos) |>
  rob_reg()

reg_corruption_delib <- lm(Similarity_mean*100 ~ Government * v2x_delibdem + log(nDict1_mean) + log(nDict2_mean) + Eng + as.factor(Year) + Dictionary1 + Dictionary2, data = df_corruption) |>
  rob_reg()

reg_sports_delib <- lm(Similarity_mean*100 ~ Government * v2x_delibdem + log(nDict1_mean) + log(nDict2_mean) + Eng + as.factor(Year) + Dictionary1 + Dictionary2, data = df_sports) |>
  rob_reg()


dict <- c("Government Accounts", "Democracy (Deliberative)", "Govt. Acct. x Democracy", "ln Frequency 1", "ln Frequency 2")
names(dict) <- c("Government", "v2x_delibdem", "Government:v2x_delibdem", "log(nDict1_mean)", "log(nDict2_mean)")
model_list_delib <- list(reg_chaos_delib, reg_corruption_delib, reg_sports_delib)
model_labels_delib <- paste(c("Chaos", "Corruption", "Sports"), " (", 1:3, ")", sep = "")
names(model_list_delib) <- model_labels_delib

gm <- tribble(~raw, ~clean, ~fmt,
              "r.squared", "R Squared", 2,
              "nobs", "Observations", 0)

rows_delib <- rows_gen

names(rows_delib) <- c('term', model_labels_delib)


tab_delib <-
  modelsummary(model_list_delib,
               coef_map = dict,
               coef_omit = c("^Eng|Year|^Dictionary"),
               estimate = "{estimate}{stars}",
               gof_map = gm,
               add_rows = rows_delib,
               fmt = "%.2f",
               stars = star_map,
               output = 'data.frame',
               escape = FALSE
               ) |>
  modelsummary_to_tex(cap = "Effect of Regime Type (Deliberative Democracy) on Framing by Attribute\\label{tab:regimeregdelib}", totex = TRUE) |>
  kable_styling(latex_options = c("hold_position")) |>
  kableExtra::footnote(general = table_note, escape = FALSE, threeparttable = TRUE, general_title = "") |>
  row_spec(10, hline_after = TRUE) |>
  pack_rows("Statistics", 11, 11, bold = FALSE, italic = TRUE, label_row_css = "", latex_gap_space = "") |>
  pack_rows("Fixed effects", 12, 14, bold = FALSE, italic = TRUE, label_row_css = "", latex_gap_space = "") |>
  add_header_above(c(" " = 1, "Similarity with Country Dictionary (%)" = 3), underline = FALSE, italic = TRUE, line = FALSE) #|>

save_kable(tab_delib, file = "03_output/Tables/table_country_concept_reg_delib.tex")

tab_delib_flex <-
  modelsummary(model_list_delib,
               coef_map = dict,
               coef_omit = c("^Eng|Year|^Dictionary"),
               estimate = "{estimate}{stars}",
               gof_map = gm,
               add_rows = rows_delib,
               fmt = "%.2f",
               stars = star_map,
               output = 'flextable',
               title = "Effect of Regime Type (Deliberative Democracy) on Framing by Attribute",
               notes = table_note_html,
               escape = FALSE
               ) |>
  hline(10) |>
  add_header_row(values = pars_dv_w,
                 colwidths = c(1, length(model_list)), top = TRUE) |>
  align_text_col(align = "center", header = TRUE, footer = FALSE)

save_as_docx(tab_delib_flex, path = "03_output/Tables/table_country_concept_reg_delib.docx")


# Standard error analysis


reg_chaos_lm <- lm(Similarity_mean*100 ~ Government * v2x_libdem  + log(nDict1_mean) + log(nDict2_mean) + Eng + as.factor(Year) + Dictionary1 + Dictionary2, data = df_chaos)

reg_corruption_lm <- lm(Similarity_mean*100 ~ Government * v2x_libdem + log(nDict1_mean) + log(nDict2_mean) + Eng + as.factor(Year) + Dictionary1 + Dictionary2, data = df_corruption)

regressions <- list(reg_chaos_lm, reg_corruption_lm)
names(regressions) <- c("Chaos", "Corruption")



out_reg <- lapply(1:length(regressions), function(x) se_reg_fun(reg = regressions[[x]])) |>
  unlist(recursive = FALSE)

name_dict <- c("Hetero.",
               "Cluster (Obj)", "Boot (Obj)",
               "Cluster (Attr)", "Boot (Attr)")

model_labels_se <- paste0(rep(name_dict, 2), " (", 1:(length(name_dict)*2), ")")
names(out_reg) <- model_labels_se

dict <- c("Government Accounts", "Democracy (vdem)", "Govt. Acct. x Democracy", "ln Frequency 1", "ln Frequency 2")

names(dict) <- c("Government", "v2x_libdem", "Government:v2x_libdem", "log(nDict1_mean)", "log(nDict2_mean)")

model_list <- out_reg
gm <- tribble(~raw, ~clean, ~fmt,
              "r.squared", "R Squared", 2,
              "nobs", "Observations", 0)

rows_se <- rbind(c('Subscription Account', rep("Yes", times = length(out_reg))),
              c('Year', rep("Yes", times = length(out_reg))),
              c('Dictionary FE', rep(c("Yes"), times = length(out_reg)))) |>
  as_tibble()

names(rows_se) <- c('term', model_labels_se)

tab_se <-
  modelsummary(model_list,
               coef_map = dict,
               coef_omit = c("^Eng|^Year|^Dictionary"),
               gof_map = gm,
               estimate = "{estimate}{stars}",
               add_rows = rows_se,
               fmt = "%.2f",
               stars = star_map,
               notes = table_note_wild,
               output = 'data.frame',
               escape = FALSE) |>
  modelsummary_to_tex(cap = "Government Framing of Countries Conditioning on Regime by Standard Error Type\\label{tab:polityse}", totex = TRUE) |>
  kable_styling(latex_options = c("hold_position", "scale_down")) |>
  kableExtra::footnote(general = table_note_wild, escape = FALSE, threeparttable = TRUE, general_title = "") |>
  row_spec(10, hline_after = TRUE) |>
  pack_rows("Statistics", 11, 11, bold = FALSE, italic = TRUE, label_row_css = "", latex_gap_space = "") |>
  pack_rows("Fixed effects", 12, 14, bold = FALSE, italic = TRUE, label_row_css = "", latex_gap_space = "") |>
  add_header_above(c(" " = 1, "Chaos" = 5, "Corruption" = 5)) |>
  add_header_above(c(" " = 1, "Similarity with Country Dictionary (%)" = 10), line = FALSE, italic = TRUE) |>
  landscape()

save_kable(tab_se, file = "03_output/Tables/polity_table_se.tex")

tab_se_flex <-
  modelsummary(model_list,
               coef_map = dict,
               coef_omit = c("^Eng|^Year|^Dictionary"),
               gof_map = gm,
               estimate = "{estimate}{stars}",
               add_rows = rows_se,
               fmt = "%.2f",
               stars = star_map,
               title = "Government Framing of Countries Conditioning on Regime by Standard Error Type",
               notes = table_note_wild_html,
               output = 'flextable',
               escape = FALSE) |>
  hline(10) |>
  add_header_row(values = pars_type, colwidths = c(1,5,5), top = TRUE) |>
  align_text_col(align = "center", header = TRUE, footer = FALSE) |>
  add_header_row(values = pars_dv_w, colwidths = c(1,10), top = TRUE)

save_as_docx(tab_se_flex, path = "03_output/Tables/polity_table_se.docx")


# Clustering on Eng


reg_chaos_lm <- lm(Similarity_mean*100 ~ Government * v2x_libdem  + log(nDict1_mean) + log(nDict2_mean) + Eng + as.factor(Year) + Dictionary1 + Dictionary2, data = df_chaos)

reg_corruption_lm <- lm(Similarity_mean*100 ~ Government * v2x_libdem + log(nDict1_mean) + log(nDict2_mean) + Eng + as.factor(Year) + Dictionary1 + Dictionary2, data = df_corruption)

reg_sports_lm <- lm(Similarity_mean*100 ~ Government * v2x_libdem + log(nDict1_mean) + log(nDict2_mean) + Eng + as.factor(Year) + Dictionary1 + Dictionary2, data = df_sports)


regressions <- list(reg_chaos_lm, reg_corruption_lm, reg_sports_lm)
names(regressions) <- c("Chaos", "Corruption", "Sports")

out <- lapply(1:length(regressions), function(x) conf_int_fun(reg = regressions[[x]], label = names(regressions)[x]))
out_df <- do.call("rbind", out)

plot_se_robust <-
  ggplot(data = out_df, aes(y = clustering, x = coef, color = clustering, linetype = clustering)) +
  geom_point() +
  geom_errorbar(aes(xmin = Pct_2.5, xmax = Pct_97.5), width=.1) +
  geom_vline(aes(xintercept = 0), color = "red") +
  facet_grid(. ~ label) +
  theme_bw() +
  scale_color_grey() +
  labs(linetype = "Standard Error", y = "Change in Similarity (%) with Diaspora", x = "", color = "Standard Error") +
  theme(text = element_text(family = "EB Garamond 12", size = 12),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())

plot_se_robust
ggsave("03_output/Graphs/country_se_robustness.pdf",
       height = 6, width = 9)
ggsave("03_output/Graphs/country_se_robustness.png",
       height = 6, width = 9)
